Data cleaning and auditing

Author

Florencia Grattarola

Published

April 16, 2025

library(httr)
library(jsonlite)
library(countrycode)
library(janitor)
library(readxl)
library(sf)
sf_use_s2(FALSE)
library(tmap)
tmap_mode('view')
library(tidyverse)
options(knitr.kable.NA = '')

Data cleaning

Read data

raw_metadata <- read_xlsx('data/draft/metada_work_version.xlsx', guess_max = 4000) 

Check columns

raw_metadata <- raw_metadata %>% 
  janitor::clean_names() %>% 
  janitor::remove_empty(c('rows', 'cols')) 

Check source fields

The fields are: name_orig, format, and language.

  • Make sure there are no \r, \n, other weird characters or typos.
  • Capitalise and clean language.
# name
raw_metadata %>% 
  filter(grepl('http', name_orig)) %>% 
  group_by(name_orig) %>% count

raw_metadata %>% 
  filter(is.na(name_orig)) %>% 
  distinct(url_clean)

raw_metadata %>% 
  mutate(name_orig = str_squish(name_orig)) %>% 
  mutate(name_orig = str_remove_all(name_orig, "\\\\")) %>% 
  mutate(name_orig = str_remove_all(name_orig, "\"")) %>% 
  group_by(name_orig) %>% 
  summarise(n_countries = n_distinct(country)) %>% 
  select(name_orig, n_countries) %>% 
  print(n=10)

# format
raw_metadata %>% 
  mutate(format = ifelse(format == 'NA', NA, str_squish(format))) %>% 
  distinct(format)

# language
raw_metadata %>% 
  mutate(language = ifelse(language == 'NA', NA, str_squish(language))) %>% 
  mutate(language = str_replace(language, "/|\\|", ' | ')) %>% 
  distinct(language) %>% 
  print(n=50)

DOUBTS

The name_orig is: https://www.odonat-grandest.fr/listes-rouges-grand-est-etat-avancement/.
The id values are: [1] 446 447 448 449 450 451 452 453 454 455 456 457.

FIX

# French: Liste rouge des Amphibiens du Grand Est
# English: Red list of Mammals of Grand Est    

raw_metadata %>% 
  mutate(name_orig = ifelse(grepl('htt', name_orig) & 
                               state_province == 'Grand Est',
                             str_glue('Red list of {group} of Grand Est'), name_orig)) %>% 
  mutate(name_orig = case_when(grepl('htt', name_orig) & 
                               state_province == 'Grand Est' & group == 'Birds' ~ 
                                 'Liste rouge des Oiseaux du Grand Est',
                               grepl('htt', name_orig) & 
                               state_province == 'Grand Est' & group == 'Mammals' ~ 
                                 'Liste rouge des XXX du Grand Est',
                               grepl('htt', name_orig) & 
                               state_province == 'Grand Est' & group == 'Orthoptera' ~ 
                                 '',
                               grepl('htt', name_orig) & 
                               state_province == 'Grand Est' & group == 'Fishes' ~ 
                                 '',
                               grepl('htt', name_orig) & 
                               state_province == 'Grand Est' & group == 'Butterflies' ~ 
                                 '',
                               grepl('htt', name_orig) & 
                               state_province == 'Grand Est' & group == 'Night butterflies' ~ 
                                 '',
                               grepl('htt', name_orig) & 
                               state_province == 'Grand Est' & group == 'Ladybugs' ~ 
                                 '',
                               grepl('htt', name_orig) & 
                               state_province == 'Grand Est' & group == 'Branchiopoda' ~ 
                                 '',
                               .default = name_orig))
  filter(state_province == 'Grand Est') %>% select(group, name_orig)

raw_metadata %>% 
  mutate(language = ifelse(language == 'NA', NA, str_squish(language))) %>% 
  mutate(language = str_replace(language, "/|\\|", ' | ')) %>% 
  distinct(language)

raw_metadata %>% 
  mutate(format = ifelse(format == '?', NA, format)) %>% 
  distinct(format)

raw_metadata %>% 
  mutate(name_orig = str_squish(name_orig)) %>% 
  mutate(name_orig = str_remove_all(name_orig, "\\\\")) %>% 
  mutate(name_orig = str_remove_all(name_orig, "\"")) 

Check Location fields

The fields are: continent, country, state_province, gadm_level_1, gadm_level_2, region_custom, region_detail, and iso_2.

  • Make sure there are no typos, thus duplicates.
  • Capitalise continent, country, state_province names.
  • Check ISO codes.
  • Check GADM levels.
  • Clean region_custom and region_detail.
# check continent
raw_metadata %>% 
  mutate(continent = str_squish(str_replace_all(str_squish(continent), '\\|', ' | '))) %>%
  mutate(continent = str_replace_all(continent, '_', ' ')) %>%
  mutate(continent = str_to_title(continent)) %>% 
  distinct(continent)

# check country
raw_metadata %>% 
  mutate(country = ifelse(country == 'NA', NA, str_squish(country))) %>% 
  mutate(country = str_replace_all(country, '_', ' ')) %>%
  mutate(country = ifelse(country == 'USSR', NA, str_to_title(country))) %>%
  mutate(country = str_replace_all(country, 'And ', 'and ')) %>% 
  mutate(country = str_replace_all(country, 'Of', 'of')) %>% 
  mutate(country = str_replace_all(country, 'The', 'the')) %>% 
  distinct(country)

# check state_province
raw_metadata %>% 
  mutate(state_province = ifelse(state_province == 'NA', NA, str_squish(state_province))) %>%
  mutate(state_province = str_to_title(state_province)) %>% 
  mutate(state_province = str_replace_all(state_province, 'And ', 'and ')) %>% 
  mutate(state_province = str_replace_all(state_province, 'Of', 'of')) %>% 
  mutate(state_province = str_replace_all(state_province, 'The', 'the')) %>% 
  filter(!is.na(state_province)) %>% 
  filter(state_province != gadm_level_1) %>% 
  distinct(country, state_province, gadm_level_1, iso_2, iso_3) %>% 
  print(n=100)

# check gadm_level_1 and gadm_level_2
raw_metadata %>% 
  mutate(gadm_level_1 = ifelse(gadm_level_1 == 'NA', NA, str_squish(gadm_level_1))) %>%
  mutate(gadm_level_1 = str_to_title(gadm_level_1)) %>% 
  mutate(gadm_level_1 = str_replace_all(gadm_level_1, 'And ', 'and ')) %>% 
  mutate(gadm_level_1 = str_replace_all(gadm_level_1, 'Of', 'of')) %>% 
  mutate(gadm_level_1 = str_replace_all(gadm_level_1, 'The', 'the')) %>% 
  filter(!is.na(gadm_level_1)) %>% distinct(gadm_level_1) %>% 
  arrange(gadm_level_1) %>% print(n=100)

raw_metadata %>% 
  mutate(gadm_level_2 = ifelse(gadm_level_2 == 'NA', NA, str_squish(gadm_level_2))) %>%
  mutate(gadm_level_2 = str_to_title(gadm_level_2)) %>% 
  filter(!is.na(gadm_level_2)) %>% select(gadm_level_2)

# check region_custom and region_detal
raw_metadata %>% 
  mutate(region_custom = ifelse(region_custom == 'NA', NA, str_squish(region_custom))) %>% 
  mutate(region_detail = ifelse(region_detail == 'NA', NA, str_squish(region_detail))) %>% 
  mutate(region_detail = str_squish(str_replace_all(str_squish(region_detail), '\\|', ' | '))) %>%
  filter(!is.na(region_custom)) %>%
  distinct(region_custom, region_detail, iso_2, iso_3) %>% 
  print(n=100)

# check iso_2 and iso_3
raw_metadata %>% 
  janitor::clean_names() %>% 
  janitor::remove_empty(c('rows', 'cols')) %>% 
  mutate(iso_2 = ifelse(iso_2 == 'NA' & country != 'Namibia', NA, str_squish(iso_2))) %>% 
  mutate(iso_2 = str_squish(str_replace_all(str_squish(iso_2), '\\|', ' | '))) %>%
  mutate(iso_3 = ifelse(iso_3 == 'NA', NA, str_squish(iso_3))) %>% 
  mutate(iso_3 = str_squish(str_replace_all(str_squish(iso_3), '\\|', ' | '))) %>%
  select(country, iso_2, iso_3, region_custom, region_detail) %>%  
  filter(is.na(iso_2)) #%>% distinct()

DOUBTS

Check Taxon fields

The fields are: kingdom, phylum, subphylum, class, order, and group.

  • Check duplicates in all fields.
  • Rename group to taxa.

I searched names using the GBIF backbone, using my own custom function nameMatcherGBIF().

# gbif name parser
nameMatcherGBIF <- function(sp_name_list) {
  
  # api <- 'http://api.gbif.org/v1/parser/name'
  api <- 'http://api.gbif.org/v1/species/match'
  
  name_parsed <- tibble(sp_name = character(),
                        scientificName = character(),
                        kingdom = character(),
                        phylum = character(),
                        class = character(),
                        order = character(),
                        family = character(),
                        genus = character(),
                        specificEpithet = character(),
                        species = character(),
                        status = character(), 
                        rank = character())
  
  for(sp_name in sp_name_list){
    # cat(sp_name, '\n')
    call_url <- str_glue('{api}?name={sp_name}&strict=true&verbose=false')
    get_json_call <- GET(url = URLencode(call_url)) %>%
      content(as = "text") %>% fromJSON(flatten = TRUE)
    
    if(get_json_call$matchType == 'NONE') {
      name_parsed_i <- tibble(sp_name = sp_name,
                              scientificName = NA,
                              kingdom = NA,
                              phylum = NA,
                              class = NA,
                              order = NA,
                              family = NA,
                              genus = NA,
                              specificEpithet = NA,
                              species = NA,
                              status = NA, 
                              rank = NA)
      name_parsed <- rbind(name_parsed, name_parsed_i)
    } else{
      name_parsed_i <- tibble(sp_name = sp_name,
                              scientificName = ifelse(exists('scientificName',get_json_call), get_json_call$scientificName, NA),
                              kingdom = ifelse(exists('kingdom',get_json_call), get_json_call$kingdom, NA),
                              phylum = ifelse(exists('phylum',get_json_call), get_json_call$phylum, NA),
                              class = ifelse(exists('class',get_json_call), get_json_call$class, NA),
                              order = ifelse(exists('order',get_json_call), get_json_call$order, NA),
                              family = ifelse(exists('family',get_json_call), get_json_call$family, NA),
                              genus = ifelse(exists('genus',get_json_call), get_json_call$genus, NA),
                              specificEpithet = ifelse(exists('specificEpithet',get_json_call), get_json_call$specificEpithet, NA),
                              species = ifelse(exists('species',get_json_call), get_json_call$species, NA),
                              status = ifelse(exists('status',get_json_call), get_json_call$status, NA), 
                              rank = ifelse(exists('rank',get_json_call), get_json_call$rank, NA))
      
      name_parsed <- rbind(name_parsed, name_parsed_i)
    }
  }
  return(name_parsed)
}

sp_list <- raw_metadata %>% 
  distinct(kingdom, phylum, subphylum, class, order, group) %>% 
  mutate(group = str_trim(group)) %>% 
  mutate(group = str_to_title(group)) %>% 
  mutate(group = str_replace_all(group, 'And ', 'and ')) %>% 
  mutate(group = str_replace_all(group, 'Of', 'of')) %>% 
  mutate(group = str_replace_all(group, 'The', 'the')) %>%
  pull(group)

sp_list_matched <- nameMatcherGBIF(sp_list) %>% suppressMessages()

sp_list_matched <- sp_list_matched %>% 
  mutate(scientificName = case_when(grepl('flora', sp_name, ignore.case=T) ~ 'Plantae',
                                    grepl('fauna', sp_name, ignore.case=T) ~ 'Animalia',
                                    grepl('tunicata', sp_name, ignore.case=T) ~ NA,
                                    .default = scientificName)) %>% 
  mutate(kingdom = case_when(grepl('flora', sp_name, ignore.case=T) ~ 'Plantae',
                             grepl('fauna', sp_name, ignore.case=T) ~ 'Animalia',
                             grepl('tunicata', sp_name, ignore.case=T) ~ NA,
                             .default = kingdom)) %>% 
  mutate(phylum = ifelse(phylum == 'chordata', 'Chordata', phylum))

sp_list_unmatched <- sp_list_matched %>% 
  filter(is.na(scientificName)) %>% pull(sp_name)

sp_list_matched %>% filter(!is.na(kingdom)) %>% nrow() # matched
[1] 205
length(sp_list_unmatched) # not matched
[1] 358

When the taxon name (i.e., group) was not found I kept the previous taxonomic fields’ values.

merged_list <- left_join(raw_metadata %>%
                           mutate(group = str_trim(group)) %>%
                           distinct(group) %>%
                           arrange(group),
                         sp_list_matched %>%
                           filter(!is.na(scientificName)) %>%
                           rename(group=sp_name) %>% distinct()) %>%
  arrange(group)

raw_metadata_taxon_list <- raw_metadata %>%
  mutate(group = str_trim(group)) %>% 
  distinct(group, .keep_all = T) %>% 
  select(kingdom, phylum, subphylum, class, order, group) %>% 
  arrange(group)

left_join(raw_metadata %>% mutate(group = str_trim(group)) %>%
            select(-c(kingdom,phylum,subphylum,class,order)),
          bind_rows(merged_list %>% 
                      filter(!is.na(scientificName)),
                    merged_list %>% 
                      filter(is.na(scientificName)) %>%
                      select(group) %>% 
                      left_join(. , raw_metadata_taxon_list))) %>% 
  mutate(group = str_trim(group)) %>% 
  mutate(group = str_to_title(group)) %>% 
  mutate(group = str_replace_all(group, 'And ', 'and ')) %>% 
  mutate(group = str_replace_all(group, 'Of', 'of')) %>% 
  mutate(group = str_replace_all(group, 'The', 'the')) %>%
  mutate(group = str_replace_all(group, 'Et Al.', 'et al.')) %>%
  distinct(group, kingdom, phylum, class, order, family, rank) %>% 
  arrange(kingdom, phylum, class, order) %>% 
  kableExtra::kbl(booktabs = T) %>% 
  kableExtra::kable_styling(latex_options = c('striped', 'hold_position'))
group kingdom phylum class order family rank
Earthworms Animalia Anelida Clitellata Opisthopora
Oligochaeta Animalia Anelida Clitellata
Onychophora Animalia Animalia
Branchiobdellida Animalia Annelida Clitellata Branchiobdellida ORDER
Hirudinea Animalia Annelida Clitellata
Worms Animalia Annelida Clitellata
Leech Animalia Annelida Clitellata
Leeches Animalia Annelida Clitellata
Echiurida Animalia Annelida Echiura Echiuroidea
Sipunculids Animalia Annelida Sipuncula
Amblypygi Animalia Arthropoda Arachnida Amblypygi ORDER
Spiders Animalia Arthropoda Arachnida Araneae
Uropodina Animalia Arthropoda Arachnida Mesostigmata
Opiliones Animalia Arthropoda Arachnida Opiliones ORDER
Opilioness Animalia Arthropoda Arachnida Opiliones
Pseudoscorpiones Animalia Arthropoda Arachnida Pseudoscorpiones ORDER
False Scorpions Animalia Arthropoda Arachnida Pseudoscorpions
Scorpions Animalia Arthropoda Arachnida Scorpionida
Arachnida Animalia Arthropoda Arachnida CLASS
Anostraca Animalia Arthropoda Branchiopoda Anostraca ORDER
Branchiopoda Animalia Arthropoda Branchiopoda CLASS
Centipedes Animalia Arthropoda Chilopoda
Chilopoda Animalia Arthropoda Chilopoda CLASS
Collembola Animalia Arthropoda Collembola CLASS
Entomostraca Animalia Arthropoda Copepoda
Copepoda Animalia Arthropoda Copepoda CLASS
Millipedes Animalia Arthropoda Diplopoda
Diplopoda Animalia Arthropoda Diplopoda CLASS
Archaeognatha Animalia Arthropoda Insecta Archaeognatha ORDER
Blattodea Animalia Arthropoda Insecta Blattodea ORDER
Wood Cockroaches Animalia Arthropoda Insecta Blattodea
Cockroaches Animalia Arthropoda Insecta Blattodea
Hydraenidae Animalia Arthropoda Insecta Coleoptera Hydraenidae FAMILY
Coleoptera Animalia Arthropoda Insecta Coleoptera ORDER
Carabidae Animalia Arthropoda Insecta Coleoptera Carabidae FAMILY
Saproxylic Beetles Animalia Arthropoda Insecta Coleoptera
Beetles Animalia Arthropoda Insecta Coleoptera
Longhorn and Scarab Beetles Animalia Arthropoda Insecta Coleoptera
Longhorn Beetles Animalia Arthropoda Insecta Coleoptera
Scarabaeidae Animalia Arthropoda Insecta Coleoptera Scarabaeidae FAMILY
Ladybugs Animalia Arthropoda Insecta Coleoptera
Water Beetles Animalia Arthropoda Insecta Coleoptera
Tenebrionidae Animalia Arthropoda Insecta Coleoptera Tenebrionidae FAMILY
Soldier Beetles Animalia Arthropoda Insecta Coleoptera
Leaf Beetles Animalia Arthropoda Insecta Coleoptera
Histeridae Animalia Arthropoda Insecta Coleoptera Histeridae FAMILY
Sphaeritidae Animalia Arthropoda Insecta Coleoptera Sphaeritidae FAMILY
Derodontidoidea Animalia Arthropoda Insecta Coleoptera
Bostrichoidea Animalia Arthropoda Insecta Coleoptera
Staphylinidae Animalia Arthropoda Insecta Coleoptera Staphylinidae FAMILY
Lucanidae Animalia Arthropoda Insecta Coleoptera Lucanidae FAMILY
Geotrupidae Animalia Arthropoda Insecta Coleoptera Geotrupidae FAMILY
Trogidae Animalia Arthropoda Insecta Coleoptera Trogidae FAMILY
Silphidae Animalia Arthropoda Insecta Coleoptera Silphidae FAMILY
Chrysomelidae Animalia Arthropoda Insecta Coleoptera Chrysomelidae FAMILY
Bark Beetles Animalia Arthropoda Insecta Coleoptera
Ground Beetles Animalia Arthropoda Insecta Coleoptera
Curculionidae Animalia Arthropoda Insecta Coleoptera Curculionidae FAMILY
Powderpost Beetles Animalia Arthropoda Insecta Coleoptera
Bostrichidae Animalia Arthropoda Insecta Coleoptera Bostrichidae FAMILY
Anobiidae Animalia Arthropoda Insecta Coleoptera Anobiidae FAMILY
Ptinidae Animalia Arthropoda Insecta Coleoptera Ptinidae FAMILY
Deadwood Beetle Animalia Arthropoda Insecta Coleoptera
Buprestidae Animalia Arthropoda Insecta Coleoptera Buprestidae FAMILY
Snout Beetles Animalia Arthropoda Insecta Coleoptera
Staphylinoidea Animalia Arthropoda Insecta Coleoptera
Cucujoidea Animalia Arthropoda Insecta Coleoptera
Lamellicornia Animalia Arthropoda Insecta Coleoptera
Seed Beetles Animalia Arthropoda Insecta Coleoptera
Weevils Animalia Arthropoda Insecta Coleoptera
Anthribidae Animalia Arthropoda Insecta Coleoptera Anthribidae FAMILY
Platypodidae Animalia Arthropoda Insecta Coleoptera Curculionidae FAMILY
Ground Beetle Animalia Arthropoda Insecta Coleoptera
Tiger Beetles Animalia Arthropoda Insecta Coleoptera
Megalopodidae Animalia Arthropoda Insecta Coleoptera Megalopodidae FAMILY
Scarabaeoidea Animalia Arthropoda Insecta Coleoptera
Cerambycidae Animalia Arthropoda Insecta Coleoptera Cerambycidae FAMILY
Curculionoidea Animalia Arthropoda Insecta Coleoptera
Cleroidea Animalia Arthropoda Insecta Coleoptera
Elateridae Animalia Arthropoda Insecta Coleoptera Elateridae FAMILY
Lymexyloidea Animalia Arthropoda Insecta Coleoptera
Cicindelidae Animalia Arthropoda Insecta Coleoptera Carabidae FAMILY
Lucanoidea Animalia Arthropoda Insecta Coleoptera
Hydrophilidae Animalia Arthropoda Insecta Coleoptera Hydrophilidae FAMILY
Platypsyllinae Animalia Arthropoda Insecta Coleoptera
Cholevinae Animalia Arthropoda Insecta Coleoptera
Malachiidae Animalia Arthropoda Insecta Coleoptera Malachiidae FAMILY
Melyridae Animalia Arthropoda Insecta Coleoptera Melyridae FAMILY
Phloeophilidae Animalia Arthropoda Insecta Coleoptera Phloiophilidae FAMILY
Cleridae Animalia Arthropoda Insecta Coleoptera Cleridae FAMILY
Cerophytidae Animalia Arthropoda Insecta Coleoptera Cerophytidae FAMILY
Eucnemidae Animalia Arthropoda Insecta Coleoptera Eucnemidae FAMILY
Cryptophagidae Animalia Arthropoda Insecta Coleoptera Cryptophagidae FAMILY
Latridiidae Animalia Arthropoda Insecta Coleoptera Latridiidae FAMILY
Mycetophagidae Animalia Arthropoda Insecta Coleoptera Mycetophagidae FAMILY
Zopheridae Animalia Arthropoda Insecta Coleoptera Zopheridae FAMILY
Monotomidae Animalia Arthropoda Insecta Coleoptera Monotomidae FAMILY
Phalacridae Animalia Arthropoda Insecta Coleoptera Phalacridae FAMILY
Pyrochroide Animalia Arthropoda Insecta Coleoptera
Meloidae Animalia Arthropoda Insecta Coleoptera
Orsodacnidae Animalia Arthropoda Insecta Coleoptera Orsodacnidae FAMILY
Donaciinae Animalia Arthropoda Insecta Coleoptera
Leptinidae Animalia Arthropoda Insecta Coleoptera Leiodidae FAMILY
Lissomidae Animalia Arthropoda Insecta Coleoptera Elateridae FAMILY
Derodontoidae Animalia Arthropoda Insecta Coleoptera
Leiodidae Animalia Arthropoda Insecta Coleoptera Leiodidae FAMILY
Oedemeridae Animalia Arthropoda Insecta Coleoptera Oedemeridae FAMILY
Melandryidae Animalia Arthropoda Insecta Coleoptera Melandryidae FAMILY
Dung Beetles Animalia Arthropoda Insecta Coleoptera
Earwigs Animalia Arthropoda Insecta Dermaptera
Heteroptera Animalia Arthropoda Insecta Diptera Sphaeroceridae GENUS
Acalyptratae Animalia Arthropoda Insecta Diptera
Calypterate Animalia Arthropoda Insecta Diptera
Larger Brachycera Animalia Arthropoda Insecta Diptera
Dolichopodid Animalia Arthropoda Insecta Diptera
Lonchopteridae Animalia Arthropoda Insecta Diptera Lonchopteridae FAMILY
Platypezidae Animalia Arthropoda Insecta Diptera Platypezidae FAMILY
Opetiidae Animalia Arthropoda Insecta Diptera Opetiidae FAMILY
Hoverflies Animalia Arthropoda Insecta Diptera
Chaoboridae Animalia Arthropoda Insecta Diptera Chaoboridae FAMILY
Thaumaleidae Animalia Arthropoda Insecta Diptera Thaumaleidae FAMILY
Ceratopogonidae Animalia Arthropoda Insecta Diptera Ceratopogonidae FAMILY
Flies Animalia Arthropoda Insecta Diptera
Asilidae Animalia Arthropoda Insecta Diptera Asilidae FAMILY
Psychodidae Animalia Arthropoda Insecta Diptera Psychodidae FAMILY
Dixidae Animalia Arthropoda Insecta Diptera Dixidae FAMILY
Soldier Flies Animalia Arthropoda Insecta Diptera
Horse-Flies Animalia Arthropoda Insecta Diptera
Bee Flies Animalia Arthropoda Insecta Diptera
Empididae Animalia Arthropoda Insecta Diptera Empididae FAMILY
Conopidae Animalia Arthropoda Insecta Diptera Conopidae FAMILY
Chironomidae Animalia Arthropoda Insecta Diptera Chironomidae FAMILY
Dolichopodidae Animalia Arthropoda Insecta Diptera Dolichopodidae FAMILY
Black Flies Animalia Arthropoda Insecta Diptera
Long-Legged Flies Animalia Arthropoda Insecta Diptera
Micropezidae Animalia Arthropoda Insecta Diptera Micropezidae FAMILY
Grass Flies Animalia Arthropoda Insecta Diptera
Tachinidae Animalia Arthropoda Insecta Diptera Tachinidae FAMILY
Aquatic Empididae Animalia Arthropoda Insecta Diptera
Pediciidae Animalia Arthropoda Insecta Diptera Pediciidae FAMILY
Limoniidae Animalia Arthropoda Insecta Diptera Limoniidae FAMILY
Diptera Animalia Arthropoda Insecta Diptera
Parasitic Diptera Animalia Arthropoda Insecta Diptera
Mayflies Animalia Arthropoda Insecta Ephemeroptera
Auchenorrhyncha Animalia Arthropoda Insecta Hemiptera
Cicadas Animalia Arthropoda Insecta Hemiptera
Shieldbugs Animalia Arthropoda Insecta Hemiptera
Clavicornia Animalia Arthropoda Insecta Hemiptera Aradidae GENUS
Big-Eyed Bugs Animalia Arthropoda Insecta Hemiptera
Nepomorpha Animalia Arthropoda Insecta Hemiptera
Fulgoromorpha Animalia Arthropoda Insecta Hemiptera
Cicadomorpha Animalia Arthropoda Insecta Hemiptera Palaeontinidae GENUS
Scale Insect Animalia Arthropoda Insecta Hemiptera
Hemiptera Animalia Arthropoda Insecta Hemiptera ORDER
Ants Animalia Arthropoda Insecta Hymenoptera
Bombus Spp. Animalia Arthropoda Insecta Hymenoptera
Hymenoptera Animalia Arthropoda Insecta Hymenoptera ORDER
Bees Animalia Arthropoda Insecta Hymenoptera
Diversicornia Animalia Arthropoda Insecta Hymenoptera Encyrtidae GENUS
Sawflies Animalia Arthropoda Insecta Hymenoptera
Spheciformes Animalia Arthropoda Insecta Hymenoptera
Pompilidae Animalia Arthropoda Insecta Hymenoptera Pompilidae FAMILY
Chrysididae Animalia Arthropoda Insecta Hymenoptera Chrysididae FAMILY
Scolioidea Animalia Arthropoda Insecta Hymenoptera
Cuckoo Wasp Animalia Arthropoda Insecta Hymenoptera
Wasps Animalia Arthropoda Insecta Hymenoptera
Sphecidae Animalia Arthropoda Insecta Hymenoptera Sphecidae FAMILY
Wild Bees Animalia Arthropoda Insecta Hymenoptera
Scoliidae Animalia Arthropoda Insecta Hymenoptera Scoliidae FAMILY
Crabronidae et al. Animalia Arthropoda Insecta Hymenoptera
Chrysididae et al. Animalia Arthropoda Insecta Hymenoptera
Symphyta Animalia Arthropoda Insecta Hymenoptera
Stinging Wasps Animalia Arthropoda Insecta Hymenoptera
Mutillidae Animalia Arthropoda Insecta Hymenoptera Mutillidae FAMILY
Sapygidae Animalia Arthropoda Insecta Hymenoptera Sapygidae FAMILY
Tiphiidae Animalia Arthropoda Insecta Hymenoptera Tiphiidae FAMILY
Cimbicidae Animalia Arthropoda Insecta Hymenoptera Cimbicidae FAMILY
Siricidae Animalia Arthropoda Insecta Hymenoptera Siricidae FAMILY
Xiphydriidae Animalia Arthropoda Insecta Hymenoptera Xiphydriidae FAMILY
Ampulicidae Animalia Arthropoda Insecta Hymenoptera Ampulicidae FAMILY
Crabronidae Animalia Arthropoda Insecta Hymenoptera Crabronidae FAMILY
Apoidea Animalia Arthropoda Insecta Hymenoptera
Lepidoptera Animalia Arthropoda Insecta Lepidoptera ORDER
Moths Animalia Arthropoda Insecta Lepidoptera
Butterflies Animalia Arthropoda Insecta Lepidoptera
Papilionoidea Animalia Arthropoda Insecta Lepidoptera
Hesperioidea Animalia Arthropoda Insecta Lepidoptera
Noctuidae Animalia Arthropoda Insecta Lepidoptera Noctuidae FAMILY
Night Butterflies Animalia Arthropoda Insecta Lepidoptera
Owlet Moths Animalia Arthropoda Insecta Lepidoptera
Lymantriinae Animalia Arthropoda Insecta Lepidoptera
Short-Cloaked Moth Animalia Arthropoda Insecta Lepidoptera
Geometer Moths Animalia Arthropoda Insecta Lepidoptera
Bombyces Animalia Arthropoda Insecta Lepidoptera
Sphinges S.l. Animalia Arthropoda Insecta Lepidoptera
Pyralidae Animalia Arthropoda Insecta Lepidoptera Pyralidae FAMILY
Sphinges Animalia Arthropoda Insecta Lepidoptera
Geometridae Animalia Arthropoda Insecta Lepidoptera Geometridae FAMILY
Makrolepidoptera Animalia Arthropoda Insecta Lepidoptera
Macrolepidoptera Animalia Arthropoda Insecta Lepidoptera
Microlepidoptera Animalia Arthropoda Insecta Lepidoptera
Zygaenidae Animalia Arthropoda Insecta Lepidoptera Zygaenidae FAMILY
Sphingidae Animalia Arthropoda Insecta Lepidoptera Sphingidae FAMILY
Sesiidae Animalia Arthropoda Insecta Lepidoptera Sesiidae FAMILY
Psychidae Animalia Arthropoda Insecta Lepidoptera Psychidae FAMILY
Pterophoridae Animalia Arthropoda Insecta Lepidoptera Pterophoridae FAMILY
Alucitidae Animalia Arthropoda Insecta Lepidoptera Alucitidae FAMILY
Crambidae Animalia Arthropoda Insecta Lepidoptera Crambidae FAMILY
Torticidae Animalia Arthropoda Insecta Lepidoptera
Choreutidae Animalia Arthropoda Insecta Lepidoptera Choreutidae FAMILY
Hawk Moths Animalia Arthropoda Insecta Lepidoptera
Bombycidae Animalia Arthropoda Insecta Lepidoptera Bombycidae FAMILY
Pantheidae Animalia Arthropoda Insecta Lepidoptera Noctuidae FAMILY
Nolidae Animalia Arthropoda Insecta Lepidoptera Nolidae FAMILY
Sessidae Animalia Arthropoda Insecta Lepidoptera
Erebidae Animalia Arthropoda Insecta Lepidoptera Erebidae FAMILY
Mantodea Animalia Arthropoda Insecta Mantodea ORDER
Mecoptera Animalia Arthropoda Insecta Mecoptera ORDER
Scorpionflies Animalia Arthropoda Insecta Mecoptera
Megaloptera Animalia Arthropoda Insecta Megaloptera
Neuroptera Animalia Arthropoda Insecta Neuroptera ORDER
Owlflies Animalia Arthropoda Insecta Neuroptera
Net-Winged Insects Animalia Arthropoda Insecta Neuroptera
Odonata Animalia Arthropoda Insecta Odonata ORDER
Orthoptera Animalia Arthropoda Insecta Orthoptera ORDER
Grasshoppers Animalia Arthropoda Insecta Orthoptera
Ensifera Animalia Arthropoda Insecta Orthoptera
Caelifera Animalia Arthropoda Insecta Orthoptera
Crickets Animalia Arthropoda Insecta Orthoptera
Katydids Animalia Arthropoda Insecta Orthoptera
Stick Insects Animalia Arthropoda Insecta Phasmatodea
Stoneflies Animalia Arthropoda Insecta Plecoptera
Snakeflies Animalia Arthropoda Insecta Raphidioptera
Thrips Animalia Arthropoda Insecta Thysanoptera Thripidae GENUS
Trichoptera Animalia Arthropoda Insecta Trichoptera ORDER
Caddisflies Animalia Arthropoda Insecta Trichoptera
Plecoptera Animalia Arthropoda Insecta CLASS
Insects Animalia Arthropoda Insecta
Aquatic and Semi-Aquatic Bugs Animalia Arthropoda Insecta
Insecta Animalia Arthropoda Insecta CLASS
Water Bugs Animalia Arthropoda Insecta
Woodlice Animalia Arthropoda Isopoda
Amphipoda Animalia Arthropoda Malacostraca Amphipoda ORDER
Niphargidae Animalia Arthropoda Malacostraca Amphipoda Niphargidae FAMILY
Cumacea Animalia Arthropoda Malacostraca Cumacea ORDER
Decapoda Animalia Arthropoda Malacostraca Decapoda ORDER
Crayfishes Animalia Arthropoda Malacostraca Decapoda
Astacoidea Animalia Arthropoda Malacostraca Decapoda
Freshwater Decapod Crustaceans Animalia Arthropoda Malacostraca Decapoda
Marine Decapod Crustaceans Animalia Arthropoda Malacostraca Decapoda
Marine Isopoda Animalia Arthropoda Malacostraca Isopoda
Malacostraca Animalia Arthropoda Malacostraca CLASS
Mysidacea Animalia Arthropoda Malacostraca CLASS
Barnacles Animalia Arthropoda Thecostraca
Freshwater Crabs Animalia Arthropoda Decapoda
Horseshoe Crabs Animalia Arthropoda Xiphosura
Crustaceans Animalia Arthropoda
Arthropods Animalia Arthropoda
Freshwater Crustaceans Animalia Arthropoda
Crayfish Animalia Arthropoda
Sea Spiders Animalia Arthropoda
Marine Crustaceans Animalia Arthropoda
Miscellaneous Arthropods Animalia Arthropoda
Myriapods Animalia Arthropoda
Brachiopods Animalia Brachiopoda
Marine Bryozoans Animalia Bryoza
Bryozoa Animalia Bryozoa PHYLUM
Bony Fishes Animalia Chordata Actinopterygii
Frogs Animalia Chordata Amphibia Anura
Amphibians Animalia Chordata Amphibia
Sea Squirts Animalia Chordata Ascidiacea
Anatidae Animalia Chordata Aves Anseriformes Anatidae FAMILY
Galliformes Animalia Chordata Aves Galliformes ORDER
Birds Animalia Chordata Aves
Breeding Birds Animalia Chordata Aves
Wintering Birds Animalia Chordata Aves
Transient Birds Animalia Chordata Aves
Birds Terre Adelie Animalia Chordata Aves
Birds Terres Australes Animalia Chordata Aves
Birds Scattered Islands Animalia Chordata Aves
Migratory Birds Animalia Chordata Aves
Metropolitan Birds Animalia Chordata Aves
Endemic Brids Animalia Chordata Aves
Breeding Birds of Prey Animalia Chordata Aves
Endangered Birds Animalia Chordata Aves
Birds Ecuador Animalia Chordata Aves
Birds Galapagos Animalia Chordata Aves
Birds of Prey Animalia Chordata Aves
Breeding Raptors Animalia Chordata Aves
Freshwater Lamprey Animalia Chordata Cephalaspidomorphi
Sharks Animalia Chordata Chondrichthyes
Chondrichthyes Animalia Chordata Chondrichthyes
Lamprey Animalia Chordata Hyperoartia
Cetaceans Animalia Chordata Mammalia Artiodactyla
Bats Animalia Chordata Mammalia Chiroptera
Perissodactyla Animalia Chordata Mammalia Perissodactyla ORDER
Primates Animalia Chordata Mammalia Primates ORDER
Lemurs Animalia Chordata Mammalia Primates
Rodents Animalia Chordata Mammalia Rodentia
Mammals Animalia Chordata Mammalia
Terrestrial Mammals Animalia Chordata Mammalia
Marine Mammals Animalia Chordata Mammalia
Mammals Scattered Islands Animalia Chordata Mammalia
Terrestial Mammals Animalia Chordata Mammalia
Aquatic Mammals Animalia Chordata Mammalia
Metropolitan Mammals Animalia Chordata Mammalia
Endemic Mammals Animalia Chordata Mammalia
Large Mammals Animalia Chordata Mammalia
Insectivores Animalia Chordata Mammalia
Carnivores Animalia Chordata Mammalia
Marine Cetartiodactyla Animalia Chordata Mammalia
Terrestrial Cetartiodactyla Animalia Chordata Mammalia
Proboscidea & Sirenia Animalia Chordata Mammalia
Endangered Mammals Animalia Chordata Mammalia
Ungulates Animalia Chordata Mammalia
Land Mammals Animalia Chordata Mammalia
Caimans Animalia Chordata Reptilia Crocodilia
Chameleons Animalia Chordata Reptilia Squamata
Lizards and Worm-Lizards Animalia Chordata Reptilia Squamata
Snakes Animalia Chordata Reptilia Squamata
Marine Turtles Animalia Chordata Reptilia Testudines
Turtles Animalia Chordata Reptilia Testudines
Sea Turtles Animalia Chordata Reptilia Testudines
Reptiles Animalia Chordata Reptilia
Terrestrial Reptiles Animalia Chordata Reptilia
Endemic Lizards Animalia Chordata Reptilia
Endemic Reptiles Animalia Chordata Reptilia
Fishes Animalia Chordata
Lampreys Animalia Chordata
Freshwater Fishes Animalia Chordata
Marine Fishes Animalia Chordata
Tunicata Animalia Chordata
Lancelets Animalia Chordata
Reef Fishes Animalia Chordata
Terrestrial Vertebrates Animalia Chordata
Freshwater and Migratory Fishes Animalia Chordata
Cyclostomata Animalia Chordata
Endangered Vertebrates Animalia Chordata
Endemic Freshwater Fishes Animalia Chordata
Linefishes Animalia Chordata
Brackish and Freshwater Fishes Animalia Chordata
Corals Animalia Cnidaria
Cnidaria Animalia Cnidaria PHYLUM
Reef Corals Animalia Cnidaria
Marine Cnidaria Animalia Cnidaria
Echinoderms Animalia Echinodermata
Acorn Worms Animalia Hemichordata Enteropneusta
Bivalvia Animalia Mollusca Bivalvia CLASS
Marine Bivalves Animalia Mollusca Bivalvia
Mussels Animalia Mollusca Bivalvia
Musslels Animalia Mollusca Bivalvia
Cephalopods Animalia Mollusca Cephalopoda
Gastropoda Animalia Mollusca Gastropoda CLASS
Snails Animalia Mollusca Gastropoda
Marine Snails Animalia Mollusca Gastropoda
Terrestrial Gastropods Animalia Mollusca Gastropoda
Freshwater Gastropods Animalia Mollusca Gastropoda
Mollusca Animalia Mollusca PHYLUM
Molluscs Animalia Mollusca
Terrestrial Molluscs Animalia Mollusca
Non-Marine Molluscs Animalia Mollusca
Inland Molluscs Animalia Mollusca
Species-Poor Groups of Marine Molluscs Animalia Mollusca
Freswater Mollusc Animalia Mollusca
Extramarine Molluscs Animalia Mollusca
Nematoda Animalia Nematoda PHYLUM
Ribbon Worms Animalia Nemertea
Flatworms Animalia Platyhelminthes Turbellaria
Porifera Animalia Porifera PHYLUM
Sea Sponges Animalia Porifera
Vertebrates Animalia chordata
Fauna Animalia
Cave Fauna Animalia
Polychaeta Animalia KINGDOM
Invertebrates Animalia
Fauna_en_higher Animalia
Fauna_nt_lc_dd Animalia
Fauna_en_vu Animalia
Terrestrial Invertebrates Animalia
Aquatic Invertebrates Animalia
Fauna_flagship Species Animalia
Endemic Fauna Animalia
Protected Animals Animalia
Endangered Fauna Animalia
Selected Species Animalia
Marine Species Animalia
Endangered Species Animalia
Marine Invertebrates Animalia
Freshwater Plants Animalia
Flora Visiting Fauna Animalia
Other Invertebrates Animalia
Other Marine Invertebrates Animalia
Marine Fauna Animalia
Endemic Animals Animalia
Vaucheriaceae Chromista Ochrophyta Xanthophyceae Vaucheriales Vaucheriaceae FAMILY
Ascomycota Fungi Ascomycota PHYLUM
Agaricales Fungi Basidiomycota Agaricomycetes Agaricales ORDER
Boletaceae Fungi Basidiomycota Agaricomycetes Boletales Boletaceae FAMILY
Boletales Fungi Basidiomycota Agaricomycetes Boletales ORDER
Russulales Fungi Basidiomycota Agaricomycetes Russulales ORDER
Ustilaginales Fungi Basidiomycota Ustilaginomycetes Ustilaginales ORDER
Basidiomycota Fungi Basidiomycota PHYLUM
Fungi Fungi KINGDOM
Macromycetes Fungi
Mushrooms Fungi
Macrofungi Fungi
Phytoparasitic Small Fungi Fungi
Large Mushrooms Fungi
Lichenicolous Fungus Fungi
Ascomycetes Fungi
Aphyllophorales Fungi
Phytoparasitic Microfungi Fungi
Characeae Plantae Charophyta Charophyceae Charales Characeae FAMILY
Charophyceae Plantae Charophyta Charophyceae CLASS
Desmidiales Plantae Charophyta Conjugatophyceae Desmidiales ORDER
Zygnematophyceae Plantae Charophyta Zygnematophyceae CLASS
Freshwater Diatoms Plantae Gyrista Bacillariophyceae
Marchantiophyta Plantae Marchantiophyta PHYLUM
Apiaceae Plantae Tracheophyta Magnoliopsida Apiales Apiaceae FAMILY
Cactaceae Plantae Tracheophyta Magnoliopsida Caryophyllales Cactaceae FAMILY
Magnoliaceae Plantae Tracheophyta Magnoliopsida Magnoliales Magnoliaceae FAMILY
Anisoptera Plantae Tracheophyta Magnoliopsida Malvales Dipterocarpaceae GENUS
Dipterocarpaceae Plantae Tracheophyta Magnoliopsida Malvales Dipterocarpaceae FAMILY
Magnoliophyta Plantae Tracheophyta PHYLUM
Charophytes Plantae Charophyceae Charales
Lycopods Plantae Lycopodiopsida Lycopodiales
Tree Ferns Plantae Polypodiopsida
Sphagnum Mosses Plantae Sphagnopsida
Orchids Plantae Asparagales
Wild Cinnamon Plantae Laurales
Bromeliads Plantae Poales
Flora Plantae
Bryophytes Plantae
Ferns Plantae
Vascular Plants Plantae
Lichens Plantae
Hydrophytes Plantae
Marine Flora Plantae
Hornworts Plantae
Liverworts Plantae
Mosses Plantae
Flora Saint Paul and Amsterdam Plantae
Flora Scattered Islands Plantae
Flora Kerguelen Plantae
Endemic Flora Plantae
Trees Plantae
Shrubs Plantae
Algae Plantae
Arctic Vascular Plants Plantae
Marine Macroalgae Plantae
Freshwater Red Algae Plantae
Freshwater Brown Algae Plantae
Flowering Plant Plantae
Red Algae Plantae
Brown Algae Plantae
Hepaticophyta Plantae
Broad-Leaved Mosses Plantae
Lichen Communities Plantae
Flora of Cerrado Biom Plantae
Endemic Plants Plantae
Flora On the Red List Plantae
Protected Plants Plantae
Endangered Plants Plantae
Near-Endemic Flora Plantae
Perennial Shrubs Plantae
Flora_2 Plantae
Endemic and Range-Restricted Vascular Plantss Plantae
Indigenous Plants Plantae
Selected Species In Marshlands Plantae
Conifers Plantae
Peninsular Planrs Plantae
Lycophytes Plantae
Higher Plants Plantae
Cloud Forest Trees Plantae
Spermatophytes Plantae
Palms Plantae
Wild Crop Relatives Plantae
Aquatic Plants Plantae
Medicinal Plants Plantae
Dry Forest Trees Plantae
Monocotyledons Plantae
Freshwater Flora Plantae
Flora List Plantae
Endemic Trees Plantae
Myxomycetes Protozoa Mycetozoa Myxomycetes CLASS
Protozoa Protozoa KINGDOM
Zygoptera Protozoa GENUS

FIX

raw_metadata %>% 
  mutate(kingdom = ifelse(group == 'Flora', 'Plantae', kingdom)) %>% 
  mutate(kingdom = ifelse(group == 'Fauna', 'Animalia', kingdom)) %>%  
  mutate(phylum = ifelse(group == 'Flora' | group == 'Fauna', NA, phylum)) %>% 
  mutate(phylum = ifelse(group =='Onychophora', 'Onychophora', phylum)) %>% 
  mutate(group = ifelse(group =='Opilioness', 'Opiliones', group)) %>% 
  mutate(group = ifelse(group =='False scorpions', 'Pseudoscorpiones', group)) %>%
  mutate(order = ifelse(group =='False scorpions', 'Pseudoscorpiones', order)) %>%
  mutate(class = ifelse(group =='Entomostraca', NA, class)) %>%
  mutate(class = ifelse(group =='Horseshoe Crabs', 'Merostomata', class)) %>%
  mutate(class = ifelse(group =='Freshwater Crabs', 'Malacostraca', class)) %>%
  mutate(class = ifelse(group =='Sharks', 'Chondrichthyes', class)) %>%
  mutate(order = ifelse(iso_2 == 'KE' & group == 'Carnivores', 'Carnivora', order),
         order = ifelse(iso_2 == 'KE' & group == 'Perissodactyla', 'Perissodactyla', order),
         order = ifelse(iso_2 == 'KE' & group == 'Marine Cetartiodactyla', 'Artiodactyla', order),
         order = ifelse(iso_2 == 'KE' & group == 'Terrestrial Cetartiodactyla', 'Artiodactyla', order)) %>%
  mutate(group = ifelse(group == 'Marine Cetartiodactyla', 'Cetaceans', group),
         group = ifelse(group == 'Terrestrial Cetartiodactyla', 'Ungulates', group)) %>% 
  mutate(order = ifelse(group =='Endemic Lizards', 'Squamata', order)) %>%
  mutate(class = ifelse(grepl('lamprey', ignore.case=T, group), 'Petromyzonti', class),
         order = ifelse(grepl('lamprey', ignore.case=T, group), 'Petromyzontiformes', order)) %>%
  mutate(group = ifelse(group == 'Musslels', 'Mussels', group)) %>% 
  mutate(phylum = ifelse(grepl('orchid', ignore.case=T, group), 'Tracheophyta', phylum),
         class = ifelse(grepl('orchid', ignore.case=T, group), 'Liliopsida', class),
         order = ifelse(grepl('orchid', ignore.case=T, group), 'Asparagales', order)) %>%
  mutate(phylum = ifelse(grepl('bromeli', ignore.case=T, group), 'Tracheophyta', phylum),
         class = ifelse(grepl('bromeli', ignore.case=T, group), 'Liliopsida', class)) %>%
  mutate(phylum = ifelse(grepl('cinnamon', ignore.case=T, group), 'Tracheophyta', phylum),
         class = ifelse(grepl('cinnamon', ignore.case=T, group), 'Magnoliopsida', class),
         order = ifelse(grepl('cinnamon', ignore.case=T, group), 'Canellales', order)) %>%
  mutate(phylum = ifelse(grepl('tree|shrub|vascular|angio|spermato|flower', ignore.case=T, group), 
                         'Tracheophyta', phylum)) %>% 
  mutate(phylum = ifelse(grepl('ferns', ignore.case=T, group), 'Tracheophyta', phylum),
         class = ifelse(grepl('ferns', ignore.case=T, group), 'Polypodiopsida', class)) %>% 
  mutate(phylum = ifelse(grepl('conif', ignore.case=T, group), 'Tracheophyta', phylum),
         class = ifelse(grepl('conif', ignore.case=T, group), 'Pinopsida', class)) %>% 
  mutate(phylum = ifelse(grepl('palm', ignore.case=T, group), 'Tracheophyta', phylum),
         class = ifelse(grepl('palm', ignore.case=T, group), 'Liliopsida', class),
         order = ifelse(grepl('palm', ignore.case=T, group), 'Arecales', order)) %>%
  mutate(class = ifelse(grepl('monocot', ignore.case=T, group), 'Tracheophyta', class)) 

Check Event fields

The fields is: year

  • Check it has numeric values.
raw_metadata %>% 
  janitor::clean_names() %>% 
  janitor::remove_empty(c('rows', 'cols')) %>% 
  mutate(year = ifelse(year == 'NA', NA, year)) %>%
  mutate(year = ifelse(year == '2024?', NA, year)) %>%
  mutate(year = ifelse(name_orig == 'The Red List of Mammals of South Africa, Swaziland and Lesotho 2024', 
                       2024, year)) %>%
  mutate(year = as.numeric(year, na.rm=T)) %>% 
  filter(is.na(year)) %>% 
  select(year, name_orig) %>% 
  distinct(name_orig, .keep_all = T) 

DOUBTS

https://www.odonat-grandest.fr/listes-rouges-grand-est-etat-avancement/
A Red List of Benin’s sharks                                           
The Red List of Mammals of South Africa, Swaziland and Lesotho 2024  
1 Crveni popis hrvatskih koralja                                                        
2 Crveni popis lišajeva Hrvatske                                                        
3 Červené seznamy                                                                       
4 Coleoptera (Beetle) – Invertebrate Ireland Online                                     
5 Tricoptera (Caddisfly) – InvertebrateIreland Online                                   
6 Les mammifères de la Côte d’Ivoire                                                    
7 Rongeurs et insectivores de Côte d’Ivoire, leur habitat et leur statut de conservation
8 Красная книга Азербайджанской Республики                                              
9 Красная книга Узбекистана  

Check if URLs are working

# URL incorrect
raw_metadata %>% 
  filter(!grepl('http', url_clean)) %>% 
  distinct(url_clean)
# A tibble: 3 × 1
  url_clean                                                  
  <chr>                                                      
1 NA                                                         
2 ima.sc.gov.br/index.php/biodiversidade/biodiversidade/fauna
3 <NA>                                                       
# URL error
# raw_metadata %>% filter(grepl('http', url_clean)) %>% 
#   distinct(url_clean) %>% 
#   mutate(check_URL = ifelse(map(URLencode(url_clean), http_error), 'not found', 'OK')) %>% 
#   filter(check_URL == 'not found') 

raw_metadata %>% 
  filter(!grepl('http', url_clean)) %>% 
  distinct(url_clean)
# A tibble: 3 × 1
  url_clean                                                  
  <chr>                                                      
1 NA                                                         
2 ima.sc.gov.br/index.php/biodiversidade/biodiversidade/fauna
3 <NA>                                                       
raw_metadata %>% 
  mutate(url_clean = ifelse(grepl('Nicolau', url_clean), NA, url_clean)) %>% 
  filter(!grepl('http', url_clean)) %>% 
  distinct(url_clean)
# A tibble: 3 × 1
  url_clean                                                  
  <chr>                                                      
1 NA                                                         
2 ima.sc.gov.br/index.php/biodiversidade/biodiversidade/fauna
3 <NA>                                                       

DOUBTS

ima.sc.gov.br/index.php/biodiversidade/biodiversidade/fauna
Nicolau, J. i Dalmau, J., 2008. Llista Vermella\r\ndels Vertebrats d’Andorra. BIOCOM (Biologia i\r\nComunicació) SL i Departament de Patrimoni\r\nNatural del Govern d’Andorra. Informe inèdit

Run code and keep relevant fields

metadata <- 
  # check taxon
  left_join(raw_metadata %>% mutate(group = str_trim(group)) %>%
              select(-c(kingdom,phylum,subphylum,class,order)),
            bind_rows(merged_list %>% filter(!is.na(scientificName)),
                      merged_list %>% filter(is.na(scientificName)) %>%
                        select(group) %>%
                        left_join(. , raw_metadata_taxon_list))) %>% 
  mutate(group = str_trim(group)) %>% 
  mutate(group = str_to_title(group)) %>% 
  mutate(group = str_replace_all(group, 'And ', 'and ')) %>% 
  mutate(group = str_replace_all(group, 'Of', 'of')) %>% 
  mutate(group = str_replace_all(group, 'The', 'the')) %>%
  # more taxonomic corrections
  mutate(kingdom = case_when(grepl('flora', group, ignore.case=T) ~ 'Plantae',
                             grepl('fauna', group, ignore.case=T) ~ 'Animalia',
                             grepl('tunicata', group, ignore.case=T) ~ NA,
                             .default = kingdom)) %>% 
  mutate(phylum = ifelse(phylum == 'chordata', 'Chordata', phylum)) %>% 
  mutate(kingdom = ifelse(group == 'Flora', 'Plantae', kingdom)) %>% 
  mutate(kingdom = ifelse(group == 'Fauna', 'Animalia', kingdom)) %>%  
  mutate(phylum = ifelse(group == 'Flora' | group == 'Fauna', NA, phylum)) %>% 
  mutate(phylum = ifelse(group =='Onychophora', 'Onychophora', phylum)) %>% 
  mutate(group = ifelse(group =='Opilioness', 'Opiliones', group)) %>% 
  mutate(group = ifelse(group =='False scorpions', 'Pseudoscorpiones', group)) %>%
  mutate(order = ifelse(group =='False scorpions', 'Pseudoscorpiones', order)) %>%
  mutate(class = ifelse(group =='Entomostraca', NA, class)) %>%
  mutate(class = ifelse(group =='Horseshoe Crabs', 'Merostomata', class)) %>%
  mutate(class = ifelse(group =='Freshwater Crabs', 'Malacostraca', class)) %>%
  mutate(class = ifelse(group =='Sharks', 'Chondrichthyes', class)) %>%
  mutate(order = ifelse(iso_2 == 'KE' & group == 'Carnivores', 'Carnivora', order),
         order = ifelse(iso_2 == 'KE' & group == 'Perissodactyla', 'Perissodactyla', order),
         order = ifelse(iso_2 == 'KE' & group == 'Marine Cetartiodactyla', 'Artiodactyla', order),
         order = ifelse(iso_2 == 'KE' & group == 'Terrestrial Cetartiodactyla', 'Artiodactyla', order)) %>%
  mutate(group = ifelse(group == 'Marine Cetartiodactyla', 'Cetaceans', group),
         group = ifelse(group == 'Terrestrial Cetartiodactyla', 'Ungulates', group)) %>% 
  mutate(order = ifelse(group =='Endemic Lizards', 'Squamata', order)) %>%
  mutate(class = ifelse(grepl('lamprey', ignore.case=T, group), 'Petromyzonti', class),
         order = ifelse(grepl('lamprey', ignore.case=T, group), 'Petromyzontiformes', order)) %>%
  mutate(group = ifelse(group == 'Musslels', 'Mussels', group)) %>% 
  mutate(phylum = ifelse(grepl('orchid', ignore.case=T, group), 'Tracheophyta', phylum),
         class = ifelse(grepl('orchid', ignore.case=T, group), 'Liliopsida', class),
         order = ifelse(grepl('orchid', ignore.case=T, group), 'Asparagales', order)) %>%
  mutate(phylum = ifelse(grepl('bromeli', ignore.case=T, group), 'Tracheophyta', phylum),
         class = ifelse(grepl('bromeli', ignore.case=T, group), 'Liliopsida', class)) %>%
  mutate(phylum = ifelse(grepl('cinnamon', ignore.case=T, group), 'Tracheophyta', phylum),
         class = ifelse(grepl('cinnamon', ignore.case=T, group), 'Magnoliopsida', class),
         order = ifelse(grepl('cinnamon', ignore.case=T, group), 'Canellales', order)) %>%
  mutate(phylum = ifelse(grepl('tree|shrub|vascular|angio|spermato|flower', ignore.case=T, group), 
                         'Tracheophyta', phylum)) %>% 
  mutate(phylum = ifelse(grepl('ferns', ignore.case=T, group), 'Tracheophyta', phylum),
         class = ifelse(grepl('ferns', ignore.case=T, group), 'Polypodiopsida', class)) %>% 
  mutate(phylum = ifelse(grepl('conif', ignore.case=T, group), 'Tracheophyta', phylum),
         class = ifelse(grepl('conif', ignore.case=T, group), 'Pinopsida', class)) %>% 
  mutate(phylum = ifelse(grepl('palm', ignore.case=T, group), 'Tracheophyta', phylum),
         class = ifelse(grepl('palm', ignore.case=T, group), 'Liliopsida', class),
         order = ifelse(grepl('palm', ignore.case=T, group), 'Arecales', order)) %>%
  mutate(class = ifelse(grepl('monocot', ignore.case=T, group), 'Tracheophyta', class)) %>% 
  # check columns
  janitor::clean_names() %>% 
  janitor::remove_empty(c('rows', 'cols')) %>% 
  # check source
  mutate(name_orig = str_squish(name_orig)) %>% 
  mutate(name_orig = str_remove_all(name_orig, "\\\\")) %>% 
  mutate(name_orig = str_remove_all(name_orig, "\"")) %>% 
  # missing name of source
  mutate(name_orig = case_when(grepl('htt', name_orig) & 
                               state_province == 'Grand Est' & group == 'Birds' ~ 
                                 'Liste rouge des Oiseaux du Grand Est',
                               grepl('htt', name_orig) & 
                               state_province == 'Grand Est' & group == 'Mammals' ~ 
                                 'Liste rouge des Mammifères du Grand Est',
                               grepl('htt', name_orig) & 
                               state_province == 'Grand Est' & group == 'Orthoptera' ~ 
                                 'Liste rouge des Orthoptères du Grand Est',
                               grepl('htt', name_orig) & 
                               state_province == 'Grand Est' & group == 'Fishes' ~ 
                                 'Liste rouge des Papillons de jour (Rhopalocères et Zygènes) du Grand Est',
                               grepl('htt', name_orig) & 
                               state_province == 'Grand Est' & group == 'Butterflies' ~ 
                                 'Liste rouge des Papillons de nuit du Grand Est',
                               grepl('htt', name_orig) & 
                               state_province == 'Grand Est' & group == 'Night butterflies' ~ 
                                 'Liste rouge des Hétérocères du Grand Est',
                               grepl('htt', name_orig) & 
                               state_province == 'Grand Est' & group == 'Ladybugs' ~ 
                                 'Liste rouge des Coccinelles du Grand Est',
                                grepl('htt', name_orig) & 
                               state_province == 'Grand Est' & group == 'Branchiopoda' ~ 
                                 'Liste rouge des Branchiopodes du Grand Est',
                                grepl('htt', name_orig) & 
                               state_province == 'Grand Est' & group == 'Heteroptera' ~ 
                                 'Liste rouge des Punaises du Grand Est',
                                grepl('htt', name_orig) & 
                               state_province == 'Grand Est' & group == 'Mayflies' ~ 
                                 'Liste rouge des Ephémères – Trichoptères – Plécoptères du Grand Est',
                                grepl('htt', name_orig) & 
                               state_province == 'Grand Est' & group == 'Caddisflies' ~ 
                                 'Liste rouge des Ephémères – Trichoptères – Plécoptères du Grand Est',
                                grepl('htt', name_orig) & 
                               state_province == 'Grand Est' & group == 'Stoneflies' ~ 
                                 'Liste rouge des Ephémères – Trichoptères – Plécoptères du Grand Est',
                               .default = name_orig)) %>% 
  # check format
  mutate(format = ifelse(format == 'NA', NA, str_squish(format))) %>% 
  mutate(format = ifelse(format == '?', NA, format)) %>% 
  # check language
  mutate(language = ifelse(language == 'NA', NA, str_squish(language))) %>% 
  mutate(language = str_replace(language, "/|\\|", ' | ')) %>% 
  # check location
  mutate(continent = str_squish(str_replace_all(str_squish(continent), '\\|', ' | '))) %>%
  mutate(continent = str_replace_all(continent, '_', ' ')) %>%
  mutate(continent = str_to_title(continent)) %>% 
  mutate(country = ifelse(country == 'NA', NA, str_squish(country))) %>% 
  mutate(country = str_replace_all(country, '_', ' ')) %>%
  mutate(country = ifelse(country == 'USSR', NA, str_to_title(country))) %>%
  mutate(country = str_replace_all(country, 'And ', 'and ')) %>% 
  mutate(country = str_replace_all(country, 'Of', 'of')) %>% 
  mutate(country = str_replace_all(country, 'The', 'the')) %>%
  mutate(state_province = ifelse(state_province == 'NA', NA, str_squish(state_province))) %>%
  mutate(state_province = str_to_title(state_province)) %>% 
  mutate(iso_2 = ifelse(iso_2 == 'NA' & country != 'Namibia', NA, str_squish(iso_2))) %>% 
  mutate(iso_3 = ifelse(iso_3 == 'NA', NA, str_squish(iso_3))) %>%  
  mutate(iso_2 = str_squish(str_replace_all(str_squish(iso_2), '\\|', ' | '))) %>%
  mutate(iso_3 = str_squish(str_replace_all(str_squish(iso_3), '\\|', ' | '))) %>%
  mutate(gadm_level_1 = ifelse(gadm_level_1 == 'NA', NA, str_squish(gadm_level_1))) %>%
  mutate(gadm_level_1 = str_to_title(gadm_level_1)) %>% 
  mutate(gadm_level_1 = str_replace_all(gadm_level_1, 'And ', 'and ')) %>% 
  mutate(gadm_level_1 = str_replace_all(gadm_level_1, 'Of', 'of')) %>% 
  mutate(gadm_level_1 = str_replace_all(gadm_level_1, 'The', 'the')) %>% 
  mutate(gadm_level_2 = ifelse(gadm_level_2 == 'NA', NA, str_squish(gadm_level_2))) %>%
  mutate(gadm_level_2 = str_to_title(gadm_level_2)) %>% 
  mutate(region_custom = ifelse(region_custom == 'NA', NA, str_squish(region_custom))) %>% 
  mutate(region_detail = ifelse(region_detail == 'NA', NA, str_squish(region_detail))) %>% 
  mutate(region_detail = str_squish(str_replace_all(str_squish(region_detail), '\\|', ' | '))) %>%
  # check event
  mutate(year = ifelse(year == 'NA', NA, year)) %>%
  mutate(year = ifelse(year == '2024?', NA, year)) %>%
  mutate(year = ifelse(name_orig == 'The Red List of Mammals of South Africa, Swaziland and Lesotho 2024', 
                       2024, year)) %>%
  mutate(year = as.numeric(year, na.rm=T)) %>% 
  # check urls
  mutate(url_clean = ifelse(url_clean == 'NA', NA, url_clean)) %>% 
  mutate(url_clean = ifelse(grepl('Nicolau', url_clean), NA, url_clean)) %>% 
  # select columns
  select(id, continent, 
         gadm_level_0 = country, gadm_level_1, gadm_level_2,
         region_custom, region_detail, iso_2, iso_3,
         taxa=group, kingdom, phylum, class, order, family, 
          source_name= name_orig, source_link = url_clean,
         language, year) 

metadata %>% slice_sample(n=50) %>% arrange(id) %>% 
  kableExtra::kbl(booktabs = T)  %>% 
  kableExtra::kable_styling(latex_options = c('striped', 'hold_position'))
id continent gadm_level_0 gadm_level_1 gadm_level_2 region_custom region_detail iso_2 iso_3 taxa kingdom phylum class order family source_name source_link language year
39 Europe Austria AT AUT Opiliones Animalia Arthropoda Arachnida Opiliones Rote Listen gefährdeter Tiere Österreichs. Teil 3: Flusskrebse, Köcherfliegen, Skorpione, Weberknechte, Zikaden https://www.umweltbundesamt.at/fileadmin/site/themen/naturschutz/rote_liste_weberknechte_2009.xlsx Deutsch 2009
50 Europe Austria Oberösterreich AT AUT Vertebrates Animalia Chordata Liste der Wirbeltiere Oberösterreichs 5. Fassung https://www.researchgate.net/publication/232095298_Liste_der_Wirbeltiere_Oberosterreichs_5_Fassung_-_Beitr_Naturk_Oberosterreichs_17_5-53 Deutsch 2007
89 Europe Italy Bolzano IT ITA Bats Animalia Chordata Mammalia Chiroptera Die Fledermäuse Tirols https://www.zobodat.at/pdf/Natur-in-Tirol_6_0001-0168.pdf Deutsch 2014
139 Europe Bosnia and Herzegovina BA BIH Flora Plantae CRVENA LISTA UGROŽENIH DIVLJIH VRSTA I PODVRSTA BILJAKA, ŽIVOTINJA I GLJIVA https://faolex.fao.org/docs/pdf/bih204210.pdf Bosnian 2014
219 Europe Slovenia SI SVN Neuroptera Animalia Arthropoda Insecta Neuroptera Rdeči seznam netopteranov (Neuropteroidea) https://www.uradni-list.si/files/RS_-2002-082-04055-OB~P018-0000.PDF Slovenian 2002
259 Europe Italy IT ITA Bees Animalia Arthropoda Insecta Hymenoptera LISTA ROSSA DELLE API ITALIANE MINACCIATE https://www.iucn.it/pdf/Comitato_IUCN_Lista_Rossa_delle_Api_italiane_minacciate.pdf Italian 2018
377 North America Martinique MQ MTQ Longhorn Beetles Animalia Arthropoda Insecta Coleoptera Liste rouge des capricornes (Cerambycidae) de la Martinique (France) https://inpn.mnhn.fr/espece/listerouge/FR/Coleopteres_longicornes_Martinique_2020 French 2020
400 Africa French Southern Territories TF ATF Reptiles Animalia Chordata Reptilia Liste rouge des reptiles des îles Éparses (France) https://inpn.mnhn.fr/espece/listerouge/FR/Reptiles_Eparses_TAAF_2015 French 2015
557 Europe France Franche-Comté FR FRA Insects Animalia Arthropoda Insecta Listes rouges régionales des insectes de Franche-Comté https://inpn.mnhn.fr/docs/LR_FCE/LR_regionale/Franche-Comt%C3%A9/listerouge2014_insectes_cle0362a7.pdf French 2013
563 Europe France Limousin FR FRA Fishes Animalia Chordata La liste rouge des espèces de poissons du Limousin https://inpn.mnhn.fr/docs/LR_FCE/LR_regionale/Limousin/LRR_poissons_Limousin_2019.pdf French 2019
846 Europe Romania RO ROU Butterflies Animalia Arthropoda Insecta Lepidoptera Lista rosie a fluturilor din România https://www.researchgate.net/publication/369357922_Lista_rosie_a_fluturilor_din_Romania_Red_List_of_Lepidoptera_of_Romania English 2021
864 Europe Germany Deutschland DE DEU Reptiles Animalia Chordata Reptilia Daten der Roten Liste Kriechtiere 2009 https://www.rote-liste-zentrum.de/en/Download-Vertebrates-1874.html German 2009
897 Europe Germany DE DEU Barnacles Animalia Arthropoda Thecostraca Rote Liste und Artenlisten der bodenlebenden wirbellosen Meerestiere https://www.rote-liste-zentrum.de/en/Download-Vertebrates-1874.html German 2013
919 Europe Germany DE DEU Mantodea Animalia Arthropoda Insecta Mantodea Rote Liste und Gesamtartenliste der Heuschrecken und Fangschrecken (Orthoptera et Mantodea) Deutschlands https://www.rote-liste-zentrum.de/en/Download-Vertebrates-1874.html German 2024
1037 Europe Germany Bayern DE DEU Cucujoidea Animalia Arthropoda Insecta Coleoptera Rote Liste gefährdeter Cucujoidea (Coleoptera: „Clavicornia“) Bayerns https://www.lfu.bayern.de/natur/rote_liste_tiere/2003/index.htm German 2003
1074 Europe Germany Bayern DE DEU Pseudoscorpiones Animalia Arthropoda Arachnida Pseudoscorpiones Rote Liste gefährdeter Pseudoskorpione (Arachnida: Pseudoscorpiones) Bayerns https://www.lfu.bayern.de/natur/rote_liste_tiere/2003/index.htm German 2003
1219 Europe Germany Niedersachsen DE DEU Bryophytes Plantae Referenzliste Moose https://www.nlwkn.niedersachsen.de/artenreferenzlisten/arten-referenzlisten-198326.html German 2022
1255 Europe Germany Bremen DE DEU Heteroptera Animalia Arthropoda Insecta Diptera Sphaeroceridae Rote Liste der in Niedersachsen und Bremen gefährdeten Wanzen https://www.nlwkn.niedersachsen.de/naturschutz/rote-liste-der-in-niedersachsen-und-bremen-gefahrdeten-wanzen-38884.html German 1999
1282 Europe Germany Hamburg DE DEU Amphibians Animalia Chordata Amphibia Verbreitung und Schutz der Amphibien und Reptilien in Hamburg 1981 https://www.hamburg.de/politik-und-verwaltung/behoerden/bukea/amphibien-und-reptilien-932338 German 1981
1293 Europe Germany Hamburg DE DEU Odonata Animalia Arthropoda Insecta Odonata Libellen in Hamburg 2006 https://www.hamburg.de/politik-und-verwaltung/behoerden/bukea/libellen-932358 German 2006
1304 Europe Germany Hamburg DE DEU Breeding Birds Animalia Chordata Aves Rote Liste der Brutvögel in Hamburg 2018 https://www.hamburg.de/politik-und-verwaltung/behoerden/bukea/voegel-932362 German 2018
1408 Europe Germany Nordrhein-Westfalen DE DEU Ground Beetles Animalia Arthropoda Insecta Coleoptera Rote Liste und Artenverzeichnis der Schmetterlinge - Lepidoptera - in Nordrhein-Westfalen https://www.lanuk.nrw.de/themen/natur/artenschutz/rote-liste German 2021
1441 Europe Germany Rheinland-Pfalz DE DEU Ferns Plantae Tracheophyta Polypodiopsida Rote Liste Farn- und Blütenpflanzen in Rheinland-Pfalz https://lfu.rlp.de/natur/artenschutz/rote-listen German 2023
1464 Europe Germany Saarland DE DEU Mayflies Animalia Arthropoda Insecta Ephemeroptera Rote Liste und Gesamtartenliste der Eintagsfliegen (Ephemeroptera) des Saarlandes https://rote-liste-saarland.de/ German 2020
1476 Europe Germany Saarland DE DEU Moths Animalia Arthropoda Insecta Lepidoptera Rote Liste und Gesamtartenliste der Nachtfalter (Lepidoptera p. p.) des Saarlandes - 4,/2. Fassung https://rote-liste-saarland.de/ German 2020
1570 Europe Germany Sachsen-Anhalt DE DEU Cyclostomata Animalia Chordata Rote Listen Sachsen-Anhalt 2020 https://lau.sachsen-anhalt.de/alt-vor-neuer-navigation/wir-ueber-uns-publikationen/fachpublikationen/berichte-des-lau/rote-listen-sachsen-anhalt-2032 German 2020
1589 Europe Germany Sachsen-Anhalt DE DEU Auchenorrhyncha Animalia Arthropoda Insecta Hemiptera Rote Listen Sachsen-Anhalt 2020 https://lau.sachsen-anhalt.de/alt-vor-neuer-navigation/wir-ueber-uns-publikationen/fachpublikationen/berichte-des-lau/rote-listen-sachsen-anhalt-2044 German 2020
1591 Europe Germany Sachsen-Anhalt DE DEU Net-Winged Insects Animalia Arthropoda Insecta Neuroptera Rote Listen Sachsen-Anhalt 2020 https://lau.sachsen-anhalt.de/alt-vor-neuer-navigation/wir-ueber-uns-publikationen/fachpublikationen/berichte-des-lau/rote-listen-sachsen-anhalt-2046 German 2020
1593 Europe Germany Sachsen-Anhalt DE DEU Snakeflies Animalia Arthropoda Insecta Raphidioptera Rote Listen Sachsen-Anhalt 2020 https://lau.sachsen-anhalt.de/alt-vor-neuer-navigation/wir-ueber-uns-publikationen/fachpublikationen/berichte-des-lau/rote-listen-sachsen-anhalt-2041 German 2020
1613 Europe Germany Sachsen-Anhalt DE DEU Mycetophagidae Animalia Arthropoda Insecta Coleoptera Mycetophagidae Rote Listen Sachsen-Anhalt 2020 https://lau.sachsen-anhalt.de/alt-vor-neuer-navigation/wir-ueber-uns-publikationen/fachpublikationen/berichte-des-lau/rote-listen-sachsen-anhalt-2045 German 2020
1663 Europe Germany Sachsen-Anhalt DE DEU Chilopoda Animalia Arthropoda Chilopoda Rote Listen Sachsen-Anhalt 2004 https://lau.sachsen-anhalt.de/naturschutz/arten-und-biotopschutz/berichte-lau-heft-39-2004-rote-liste German 2004
1689 Europe Germany Sachsen-Anhalt DE DEU Monotomidae Animalia Arthropoda Insecta Coleoptera Monotomidae Rote Listen Sachsen-Anhalt 2004 https://lau.sachsen-anhalt.de/naturschutz/arten-und-biotopschutz/berichte-lau-heft-39-2004-rote-liste German 2004
1728 Europe Germany Schleswig-Holstein DE DEU Bryophytes Plantae Die Moose Schleswig-Holsteins– Rote Liste https://www.schleswig-holstein.de/DE/fachinhalte/A/artenschutz/as_04_RoteListen German 2002
1882 Europe Kosovo XK KOS Vascular Plants Plantae Tracheophyta Lista e kuqe e florës vaskulare të Republikës së Kosovës https://www.ammk-rks.net/al/lajmi-single/249 German 2015
1885 Europe Liechtenstein LI LIE Vascular Plants Plantae Tracheophyta Rote Liste der gefährdeten und seltenen Gefässpflanzen des Fürstentums Liechtenstein 2006 https://buntundartenreich.at/upload/file/Rote_Liste_der_Gef%C3%A4sspflanzen_des_F%C3%BCrstentums_Liechtenstein_2006.pdf German 2006
1959 South America Brazil São Paulo BR BRA Fauna Animalia Decreto nº 53.494, de 2 de outubro de 2008 https://www.al.sp.gov.br/repositorio/legislacao/decreto/2008/decreto-53494-02.10.2008.html Portuguese 2008
2163 Asia Singapore SG SGP Freshwater Decapod Crustaceans Animalia Arthropoda Malacostraca Decapoda THE SINGAPORE RED DATA BOOK 2nd Edition https://www.nparks.gov.sg/nature/species-list English 2008
2361 Europe Belgium Vlaanderen BE BEL Ants Animalia Arthropoda Insecta Hymenoptera Verspreidingsatlas en voorlopige Rode Lijst van de mieren van Vlaanderen https://www.vlaanderen.be/publicaties/verspreidingsatlas-en-voorlopige-rode-lijst-van-de-mieren-van-vlaanderen Dutch 2003
2542 Europe Ukraine Kirovohrad UA UKR Flora Plantae ОФІЦІЙНІ ПЕРЕЛІКИ РЕГІОНАЛЬНО РІДКІСНИХ РОСЛИН АДМІНІСТРАТИВНИХ ТЕРИТОРІЙ УКРАЇНИ https://www.plantarium.ru/page/redbook/id/253.html Ukrainian 2012
2608 North America West Indies Saint Lucia | Trinidad and Tobago | Saint Kitts and Nevis | Grenada | Barbados | Aruba | Antigua and Barbadua | Anguilla | Haiti | Dominican Republic | Cayman Islands | Jamaica | Puerto Rico | Cuba | Turks and Caicos | Bahamas BS | TC | CU | PR | JM | KY | DM | DO | HT | AI | AG | AW | BB | GD | KN | LC | TT BHS | TCA | CUB | PRI | JAM | CYM | DMA | DOM | HTI | AIA | ATG | ABW | BRB | GRD | KNA | LCA | TTO Birds Animalia Chordata Aves Complete Checklist of the Birds of the West Indies https://www.birdscaribbean.org/caribbean-birds/ English 2019
2725 Africa Democratic Republic of the Congo CD COD Trees Plantae Tracheophyta Red List of the endemic and subendemic trees of Central Africa (Democratic Republic of the Congo - Rwanda - Burundi) https://zenodo.org/records/5645166 English 2021
2771 Africa Madagascar MG MDG Freshwater Fishes Animalia Chordata RED LIST ASSESSMENT OF MADAGASCAR’S FRESHWATER FISHES https://portals.iucn.org/library/sites/library/files/documents/Rep-2004-032.pdf English 2004
2788 Africa Morocco MA MAR Flora Plantae Livre rouge de la flore vasculaire du Maroc, huitième fascicule https://www.tela-botanica.org/2016/11/article7745/ French 2018
2813 Europe | Africa | Asia Mediterranean Morocco | Algeria | Tunisia | Libya | Western Sahara | Egypt | Palestine | Israel | Lebanon | Syria | Turkey | Jordan | Bulgaria | Greece | Albania | Montenegro | Croatia | Italy | France | Spain | Bosnia and Herzegovina | Slovenia | Monaco | Portugal | Cyprus | North Macedonia | Kosovo | Malta | Andorra | Serbia | Switzerland MA | DZ | TN | LY | EH | EG | PS | IL | LB | SY | TR | JO | BG | GR | AL | ME | HR | IT | FR | ES | BA | SI | MC | PT | CY | MK | KS | MT | AD | RS | CH MAR | DZA | TUN | LBY | ESH | EGY | PSE | ISR | LBN | SYR | TUR | JOR | BGR | GRC | ALB | MNE | HRV | ITA | FRA | ESP | BIH | SVN | MCO | PRT | CYP | MKD | KOS | MLT | AND | SRB | CHE Odonata Animalia Arthropoda Insecta Odonata The status and distribution of dragonflies of the Mediterranean basin https://portals.iucn.org/library/node/9857 English 2009
2907 Asia Japan JP JPN Bryophytes Plantae レッドリスト2019 https://ikilog.biodic.go.jp/Rdb/booklist Japanese 2019
2946 Asia Japan JP JPN Insects Animalia Arthropoda Insecta レッドリスト2015 https://ikilog.biodic.go.jp/Rdb/booklist Japanese 2015
3123 Asia Russia Kamchatka RU RUS Flora Plantae Красная книга Камчатки. Том 2. Растения, грибы, термофильные микроорганизмы. Петропавловск-Камчатский, 2007. https://www.plantarium.ru/lang/en/page/redbook/id/20.html Russian 2007
3188 Asia Russia Novosibirsk RU RUS Flora Plantae Постановление правительства Новосибирской области от 07.08.2018 № 294-п О внесении изменений в постановление администрации Новосибирской области от 21.07.2008 № 200-па https://www.plantarium.ru/lang/en/page/redbook/id/279.html Russian 2018
3261 Asia Russia Tyumen' RU RUS Flora Plantae Красная книга Тюменской области. Кемерово, 2020. https://www.plantarium.ru/lang/en/page/redbook/id/316.html Russian 2020
3285 Asia Russia Chukot RU RUS Flora Plantae Красная книга Чукотского автономного округа. Редкие и находящиеся под угрозой исчезновения виды растений (покрытосеменные, папоротниковидные, плауновидные, мохообразные, лишайники, грибы), 2008. https://www.plantarium.ru/lang/en/page/redbook/id/70.html Russian 2008

Data audit

Summary

Code
metadata %>% 
  summarise(`Number of records` = n(),
            `Number of sources` = n_distinct(source_name),
            `Number of taxa` = n_distinct(taxa),
            `Animalia records` = sum(kingdom == 'Animalia'),
            `Plantae records` = sum(kingdom == 'Plantae'),
            `Fungi records` = sum(kingdom == 'Fungi'),
            Countries = n_distinct(gadm_level_0),
            `Sub-national territories` = n_distinct(gadm_level_1)) %>% 
  t() %>% `colnames<-`(c("N")) %>% 
  kableExtra::kbl(booktabs = T)  %>% 
  kableExtra::kable_styling(latex_options = c('striped', 'hold_position'))
N
Number of records 3200
Number of sources 2091
Number of taxa 483
Animalia records
Plantae records
Fungi records
Countries 170
Sub-national territories 223

Geographic coverage

Code
metadata %>%
  separate_rows(continent, sep = '\\|') %>%
  mutate(continent = str_squish(continent)) %>%
  group_by(continent) %>% 
  summarise(n_sources = ifelse(n_distinct(source_name, na.rm = TRUE)==0, 
                               0, n_distinct(source_name, na.rm = TRUE))) %>% 
  arrange(desc(n_sources)) %>% 
  rename(`Number of sources`=n_sources) %>% 
  adorn_totals('row') %>% 
  kableExtra::kbl(booktabs = T)  %>% 
  kableExtra::kable_styling(latex_options = c('striped', 'hold_position')) 
continent Number of sources
Europe 1439
Asia 272
Africa 143
South America 116
North America 112
Oceania 31
Antarctica 7
Total 2120
Code
metadata %>%
  separate_rows(continent, sep = '\\|') %>%
  mutate(continent = str_squish(continent)) %>%
  group_by(continent) %>% 
  summarise(n_records = n()) %>% 
  arrange(desc(n_records)) %>% 
  rename(`Number of records`=n_records) %>% 
  adorn_totals('row') %>% 
  kableExtra::kbl(booktabs = T)  %>% 
  kableExtra::kable_styling(latex_options = c('striped', 'hold_position')) 
continent Number of records
Europe 2182
Asia 470
Africa 205
North America 156
South America 150
Oceania 62
Antarctica 7
Total 3232
Code
# Europe
metadata %>%
  separate_rows(continent, sep = '\\|') %>%
  mutate(continent = str_squish(continent)) %>%
  filter(continent == 'Europe') %>% 
  group_by(kingdom) %>% 
  summarise(n_records = n()) %>% 
  arrange(desc(n_records)) %>% 
  rename(`Number of records`=n_records) %>% 
  adorn_totals('row') %>% 
  kableExtra::kbl(booktabs = T)  %>% 
  kableExtra::kable_styling(latex_options = c('striped', 'hold_position')) 
kingdom Number of records
Animalia 1550
Plantae 549
Fungi 75
Protozoa 6
Chromista 1
1
Total 2182
Code
metadata %>%
  separate_rows(continent, sep = '\\|') %>%
  mutate(continent = str_squish(continent)) %>%
  filter(continent == 'Asia') %>% 
  group_by(kingdom) %>% 
  summarise(n_records = n()) %>% 
  arrange(desc(n_records)) %>% 
  rename(`Number of records`=n_records) %>% 
  adorn_totals('row') %>% 
  kableExtra::kbl(booktabs = T)  %>% 
  kableExtra::kable_styling(latex_options = c('striped', 'hold_position'))
kingdom Number of records
Animalia 277
Plantae 175
Fungi 18
Total 470
Code
world <- geodata::world(resolution = 3, level = 0, path = 'data/')

world_records <- left_join(st_as_sf(world), metadata %>%
  separate_rows(iso_3, sep = '\\|') %>% 
  select(iso_2, GID_0=iso_3, source_name)) %>% 
  group_by(GID_0, NAME_0) %>% 
  summarise(n_sources = ifelse(n_distinct(source_name, na.rm = TRUE)==0, 
                               0, n_distinct(source_name, na.rm = TRUE)),
            iso_2_string = ifelse(n_sources>0,
                                  paste(iso_2, collapse = ';'), NA)) %>% 
  ungroup() %>% st_cast() %>% st_set_crs(4326)

plot_figure_1 <- tm_shape(world_records %>% 
           select(-iso_2_string) %>% 
           mutate(n_sources=ifelse(n_sources==0,
                                   NA, n_sources))) +
  tm_polygons(fill = 'n_sources',fill_alpha = 0.9,
              col='grey40', col_alpha = 0.2,
              fill.scale = tm_scale_intervals(n = 6, 
                                              #style = 'jenks', 
                                              breaks = c(1,5,10,20,100,979),
                                              values = 'brewer.reds',
                                              value.na = 'grey80',
                                              label.na = '0'),
              fill.legend = tm_legend(item.space = 0, item.na.space = 0,
                                      title = 'Number of sources', 
                                      reverse=T, 
                                      # frame=F, 
                                      frame.lwd = 0.1,
                                      bg.color = 'white')) +
  tm_layout(legend.outside = T, 
            legend.position = c('left','bottom'),  frame=F) +
  tm_crs(property='global')

tmap_mode('plot')
plot_figure_1

Code
tmap_mode('view')
plot_figure_1

Taxonomic coverage

Code
# by kingdom
metadata %>%
  group_by(kingdom) %>% 
  summarise(n_records = n()) %>% 
  arrange(desc(n_records)) %>% 
  rename(`Number of records` = n_records) %>% 
  adorn_totals('row') %>% 
  kableExtra::kbl(booktabs = T)  %>% 
  kableExtra::kable_styling(latex_options = c('striped', 'hold_position'))
kingdom Number of records
Animalia 2188
Plantae 899
Fungi 105
Protozoa 6
Chromista 1
1
Total 3200
Code
# animalia
metadata %>% 
    filter(!is.na(class)) %>% 
    filter(kingdom %in% c('Animalia')) %>% 
    group_by(kingdom, class) %>% 
    summarise(n_sources_taxa = n()) %>% 
    arrange(desc(n_sources_taxa)) %>% 
    slice_head(n=5) %>% 
  rename(`Number of sources` = n_sources_taxa) %>% 
  adorn_totals('row') %>% 
  kableExtra::kbl(booktabs = T)  %>% 
  kableExtra::kable_styling(latex_options = c('striped', 'hold_position'))
kingdom class Number of sources
Animalia Insecta 854
Animalia Mammalia 202
Animalia Aves 185
Animalia Reptilia 148
Animalia Amphibia 133
Total - 1522
Code
# plantae
metadata %>% 
    filter(!is.na(order)) %>% 
    filter(kingdom %in% c('Plantae')) %>% 
    group_by(kingdom, order) %>% 
    summarise(n_sources_taxa = n_distinct(source_name)) %>% 
    arrange(desc(n_sources_taxa)) %>% 
    slice_head(n=5) %>% 
  rename(`Number of sources` = n_sources_taxa) %>% 
  kableExtra::kbl(booktabs = T)  %>% 
  kableExtra::kable_styling(latex_options = c('striped', 'hold_position'))
kingdom order Number of sources
Plantae Charales 18
Plantae Asparagales 5
Plantae Arecales 4
Plantae Caryophyllales 2
Plantae Malvales 2
Code
# fish sources
metadata %>% 
  filter(grepl('fish', taxa, ignore.case=T)) %>% 
  filter(!grepl('crayfish', taxa, ignore.case=T)) %>% 
  distinct(source_name) %>% count() %>% 
  rename(`Number of fish sources` = n) %>% 
  kableExtra::kbl(booktabs = T)  %>% 
  kableExtra::kable_styling(latex_options = c('striped', 'hold_position'))
Number of fish sources
102
Code
metadata %>% 
  filter(!is.na(phylum)) %>% 
  filter(kingdom %in% c('Animalia')) %>% 
  group_by(kingdom, phylum) %>% 
  summarise(n_sources_taxa = n_distinct(source_name)) %>% 
  ggplot(aes(x = reorder(phylum, -n_sources_taxa), y = n_sources_taxa)) +
  geom_bar(stat = "identity", fill = "#4CAF50") +
  facet_wrap(~kingdom, scales = 'free') +
  labs(x = "", y = "Number of sources") +
  ggpubr::theme_pubclean() +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) 

Code
metadata %>% 
  filter(!is.na(phylum)) %>% 
  filter(kingdom %in% c('Plantae', 'Fungi')) %>% 
  group_by(kingdom, phylum) %>% 
  summarise(n_sources_taxa = n_distinct(source_name)) %>% 
  ggplot(aes(x = reorder(phylum, -n_sources_taxa), y = n_sources_taxa)) +
  geom_bar(stat = "identity", fill = "#4CAF50") +
  facet_wrap(~kingdom, scales = 'free') +
  labs(x = "", y = "Number of sources") +
  ggpubr::theme_pubclean() +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) 

Code
metadata %>% 
  filter(!is.na(order)) %>% 
  filter(kingdom %in% c('Animalia')) %>% 
  group_by(kingdom, order) %>% 
  summarise(n_sources_taxa = n_distinct(source_name)) %>% 
  ggplot(aes(x = reorder(order, -n_sources_taxa), y = n_sources_taxa)) +
  geom_bar(stat = "identity", fill = "#4CAF50") +
  facet_wrap(~kingdom, scales = 'free') +
  labs(x = "", y = "Number of sources") +
  ggpubr::theme_pubclean() +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) 

Code
metadata %>% 
  filter(!is.na(order)) %>% 
  filter(kingdom %in% c('Plantae', 'Fungi')) %>% 
  group_by(kingdom, order) %>% 
  summarise(n_sources_taxa = n_distinct(source_name)) %>% 
  ggplot(aes(x = reorder(order, -n_sources_taxa), y = n_sources_taxa)) +
  geom_bar(stat = "identity", fill = "#4CAF50") +
  facet_wrap(~kingdom, scales = 'free') +
  labs(x = "", y = "Number of sources") +
  ggpubr::theme_pubclean() +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) 

Temporal coverage

Code
metadata %>% 
  group_by(year) %>% 
  summarise(publications_year = n_distinct(source_name)) %>% 
  ggplot(aes(x = year, y = publications_year)) +
  geom_bar(stat = "identity", fill = "#4CAF50") +
  ylim(c(0,150)) + xlim(c(1975,2025)) +
  scale_x_continuous(n.breaks = 15) +
  labs(x = "", y = "Number of sources") +
  ggpubr::theme_pubclean()

Save file

Code
write_csv(metadata, 'data/metadata.csv', na = '')